/* For each DGP, and for both ES and ESMV bin selectors, compute the Gini
coefficients. X axis: bin number / total number of bins. Y axis: cumulative
number of observations / total number of observations.
Remember that the observations are sorted by "wealth", i.e. the bins with the
fewest number of observations go first.
The Gini coefficient can then be computed as 1 - 2B, where B is the sum of the
area of each resulting trapezoid. The trapezoids all have the same "height", 
1/number of bins, and have bases equal to the Y values defined above. There is
an implicit point at (0,0). There is an explicit point at (1,1).
Average the Gini coefficients for each DGP over the seeds.
*/

cap log close
log using "${logs}/compute_gini_coefficients.log", replace

set obs 11
gen paper = ""
gen gini_es = .
gen gini_esmv = .

tempfile d
save "`d'"

foreach paper in "ADGP1" "BDGP2" "CDGP3" "DDGP4" "EDGP5" "FDGP6" ///
		"GDGP7" "HDGP8" "IDGP9" "JDGP10" "KDGP11" {
		
	foreach rng in 1 2 3 4 5 6 7 8 {
	
		use "${sim_dat}/`paper'_0_`rng'.dta", clear
	
		* Force at least five bins per side
		rdplot7511 y x, binselect(es) graph_options(nodraw)
		local nbins_l = max(5, e(J_star_l))
		local nbins_r = max(5, e(J_star_r))
		* Yes vline, no fit, small binwidth, ES
		rdplot7511 y x, binselect(es) nbins(`nbins_l' `nbins_r') graph_options(nodraw) genvars
		
		* Correct obs/bin in older version of rdplot
		replace rdplot_N = rdplot_N + 1
		replace rdplot_N = rdplot_N / _N
		collapse (first) rdplot_N, by(rdplot_id)
		sum rdplot_id
		replace rdplot_id = (rdplot_id + abs(r(min)) + 1) / (_N + 1)
		* Add point at 0		
		set obs `= _N + 1'
		replace rdplot_id = 0 if rdplot_id == .
		replace rdplot_N = 0 if rdplot_N == .
		sort rdplot_N
		
		gen rdplot_pct_cum = sum(rdplot_N)
		gen rdplot_pct_cum_prev = rdplot_pct_cum[_n-1]
		gen area_under_trap = (rdplot_pct_cum + rdplot_pct_cum_prev) / (2 * (_N - 1))
		
		collapse (sum) area_under_trap
		sum area_under_trap
		local lorenz_es = r(mean)
		
		use "${sim_dat}/`paper'_0_`rng'.dta", clear
	
		* Force at least five bins per side
		rdplot7511 y x, binselect(esmv) graph_options(nodraw)
		local nbins_l = max(5, e(J_star_l))
		local nbins_r = max(5, e(J_star_r))
		* Yes vline, no fit, small binwidth, ES
		rdplot7511 y x, binselect(esmv) nbins(`nbins_l' `nbins_r') graph_options(nodraw) genvars
		
		replace rdplot_N = rdplot_N + 1
		replace rdplot_N = rdplot_N / _N
		collapse (first) rdplot_N, by(rdplot_id)
		sum rdplot_id
		replace rdplot_id = (rdplot_id + abs(r(min)) + 1) / (_N + 1)
		* Add point at 0		
		set obs `= _N + 1'
		replace rdplot_id = 0 if rdplot_id == .
		replace rdplot_N = 0 if rdplot_N == .
		sort rdplot_N
		
		gen rdplot_pct_cum = sum(rdplot_N)
		gen rdplot_pct_cum_prev = rdplot_pct_cum[_n-1]
		gen area_under_trap = (rdplot_pct_cum + rdplot_pct_cum_prev) / (2 * (_N - 1))
		
		collapse (sum) area_under_trap
		sum area_under_trap
		local lorenz_esmv = r(mean)
		
		clear
		set obs 1
		gen paper = "`paper'"
		gen gini_es = 1 - 2 * `lorenz_es'
		gen gini_esmv = 1 - 2 * `lorenz_esmv'
		append using "`d'"
		save "`d'", replace
		
	}
	
}

collapse (mean) gini_es gini_esmv, by(paper)
drop if paper == ""

label variable paper "DGP"
label variable gini_es "Gini coefficient (based on inequality of number of observations per bin) using CCT's evenly spaced IMSE-optimal bin numbers"
label variable gini_es "Gini coefficient (based on inequality of number of observations per bin) using CCT's evenly spaced mimicking variance bin numbers"

save "${dat}/gini_coefficients_rd.dta", replace

log close
